
version 15
capture log close
set more off
clear
clear matrix
clear mata

if c(username)=="WB485280" {
		glo rootdir		"C:\Users\wb485280\OneDrive - WBG\radicalization"
		}
if c(username)=="WB382635" {
		glo rootdir		"C:\Users\wb382635\Dropbox\Unemp & daesh"
		}
if c(username)=="WB452275" {
		glo rootdir		"C:\Users\WB452275\Dropbox\Projects\Unemp & daesh"
		}
if c(username)=="sarurchaudhary" {
		glo rootdir		"/Users/sarurchaudhary/Dropbox/Unemp & daesh"
		}

		glo	datadir     "${rootdir}/Data/Raw data"
		glo outdir		"${rootdir}/Data/Working datasets"
		glo dodir		"${rootdir}/Dofiles"
        
		cd "${outdir}"

/******************************************************************************************************
We clean the last file wich is a result of match in matlab
and cleaning of job variable into the international classification
in the do file "Job ISCO classification".

For all variables: missing information in the original file is noted . or ""
information that is in said file but could not be used (to vague, no sense, etc...) we use the Unknown
******************************************************************************************************/

clear

cd "\\tsclient\L\stata2014\ISIS_fighters"
import excel "data_fighters.xls", sheet("Feuil1") firstrow

//drop if country_origin == "Unknown"
replace country_origin = "Saudi Arabia" if strpos(Nationality,"?")
replace country_origin = "Kenya" if strpos(Nationality,"Kenia")
replace country_origin = "" if country_origin == "Unknown"
replace country_origin = "Unknown" if country_origin == "Turkestan"

/************************
Cleaning the aspirations
*************************/

replace Aspiration_cl = "Administrative" if strpos(Aspiration_ISIS,"Media") | strpos(Aspiration_ISIS,"Manufacturing") | strpos(Aspiration_ISIS,"Salesman") | strpos(Aspiration_ISIS,"smith") | strpos(Aspiration_ISIS,"isitor")
replace Aspiration_cl = "Fighter" if strpos(Aspiration_ISIS,"Tank") | strpos(Aspiration_ISIS,"issile") | strpos(Aspiration_ISIS,"ecurity")

replace Aspiration_cl = "Suicide" if Aspiration_cl == "Suicidal"
replace Aspiration_cl = "Fighters" if Aspiration_cl == "Fighter"
replace Aspiration_cl = "Administrative" if Aspiration_cl == "Legislation adviser"
replace Aspiration_cl = "" if Aspiration_cl == "Unknown"
replace Aspiration_cl = "" if Aspiration_cl == "None"
replace Aspiration_cl = "Suicid_fighter" if Aspiration_ISIS == "Suicid_fighter"
 
replace Aspiration_cl = "Unknown" if Aspiration_cl == "to be filled"

/*************************
 Cleaning Educations
**************************/
replace Education_cl = "Primary" if Education_cl == "Preparatory/Primary"
//replace Education_cl = "Primary" if Education_cl == "Elementary"
// replace Education_cl = "Secondary" if Education_cl == "Mid-Level"
//replace Education_cl = "No education" if Education_cl == "Illiterate"
replace Education_cl = "Unknown" if Education_cl == "Not clarified"

replace Education_cl = "Unknown" if Education_cl == "to be filled"

/**************************************************
Cleaning Date of birth. We replace by missing the 
number that make no sense since it is a numeric variable.
***************************************************/

replace YearofBirth = "1982" if ID == 3195
replace YearofBirth = "1986" if ID == 3196

replace YearofBirth = "." if strpos(YearofBirth,"?")

destring YearofBirth, replace force

replace YearofBirth = . if ((YearofBirth < 1900) | (YearofBirth > 2014))

/************************
Cleaning Marital
************************/
replace CivilStatus_cl = "" if CivilStatus_cl == "Unknown"

replace CivilStatus_cl = "Unknown" if CivilStatus_cl == "Not clarified"

replace CivilStatus_cl = "Unknown" if CivilStatus_cl == "to be filled"

/************************
Cleaning Religious
************************/

replace ReligiousLevel_cl = "" if ReligiousLevel_cl == "None"
replace ReligiousLevel_cl = "Unknown" if ReligiousLevel_cl == "to be filled"


/***********************
Cleaning Experience
***********************/
replace Experience_cl = "" if Experience_cl == "Unknown"
replace Experience_cl = "No" if Experience_cl == "None"
replace Experience_cl = "Unknown" if Experience_cl == "to be filled"

/**********************************************
Dropping intermediate variables and duplicates
***********************************************/

drop replacements* Nationality Education CivilStatus ReligiousLevel Aspiration_ISIS Job ExperienceinJihad

drop ID

sort country_origin Education_cl CivilStatus ReligiousLevel Aspiration_cl Experience_cl YearofBirth Country_resid Point_entry
quietly by country_origin Education_cl CivilStatus ReligiousLevel Aspiration_cl Experience_cl YearofBirth Country_resid Point_entry: gen dup = cond(_N==1,0,_n)

drop if dup > 1
drop dup


/*
foreach x of varlist country_origin Education_cl CivilStatus ReligiousLevel Aspiration_cl Experience_cl Country_resid Point_entry Job_cl {
  replace `x' = "" if (`x' == "Unknown")
}
*/

/***********************************************
Renaming the cleaned variables and labeling them
***********************************************/

rename Education_cl Education
rename Aspiration_cl Aspiration
rename Experience_cl Experience
rename Job_cl Job
rename ReligiousLevel_cl ReligiousLevel
rename CivilStatus_cl CivilStatus
rename country_origin Country_origin
rename final_isco Job_survey

label variable Education "Education before joining"
label variable Aspiration "Prefered role"
label variable Experience "Experience with Jihad"
label variable Job "Occupation before joining"
label variable ReligiousLevel "Knowledge in religion"
label variable CivilStatus "Marital status"
label variable Country_origin "Nationality"
label variable Country_resid "Country of Residence"
label variable Point_entry "Final transit point"
label variable Job_survey "Occupation matched to surveys"

/*************************************************
Couuntry of residence Uknown are actually missing
**************************************************/

replace Country_resid = "" if Country_resid == "Unknown"

/************************************************************************
Using the variables as matched to classification commonly used in surveys
*************************************************************************/

label define Job_classification  1 "employer/manager of establishement with 10 or more employees" 2 "employer/manager of establishement with less than 10 employees" 3 "professional worker lawyer, accountant, teacher, etc" 4 "manual worker" 5 "agricultural worker/owner of a farm" 6 "member of armed forces/public security" 7 "owner of a shop/grocery store" 8 "Governe;ent employee" 9 "privates sectore employee" 10 "craftsperson" 95 "Student" 96 "Retired" 94 "Illegal" 97 "No work"
label  values Job_survey Job_classification

gen Education_survey = 0
replace Education_survey = 1 if Education == "Illiterate"
replace Education_survey = 2 if Education == "Elementary"
replace Education_survey = 3 if Education == "Primary"
replace Education_survey = 4 if Education == "Secondary"
replace Education_survey = 5 if Education == "Mid-Level"
replace Education_survey = 6 if strpos(Education,"niversity")
replace Education_survey = 7 if Education == "Unknown"

label define Education_classi 0 "Missing" 1 "Illiterate/No formal education" 2 "Elementary" 3 "Preparatory/basic" 4 "Secondary" 5 "Mid-level" 6 "University" 7 "Unknown"
label values Education_survey Education_classi

drop Education Job
rename Education_survey Education
rename Job_survey job

gen country = .
replace country = 1 if Country_resid == "Algeria"
replace country = 5 if Country_resid == "Egypt"
replace country = 7 if Country_resid == "Iraq"
replace country = 8 if Country_resid == "Jordan"
replace country = 9 if Country_resid == "Kuwait"
replace country = 10 if Country_resid == "Lebanon"
replace country = 11 if Country_resid == "Libya"
replace country = 13 if Country_resid == "Morocco"
replace country = 15 if Country_resid == "Palestine"
replace country = 17 if Country_resid == "Saudi Arabia"
replace country = 19 if Country_resid == "Sudan"
replace country = 21 if Country_resid == "Tunisia"
replace country = 22 if Country_resid == "Yemen"

label define country_clas 1 "Algeria" 5 "Egypt" 7 "Iraq" 8 "Jordan" 9 "Kuwait" 10 "Lebanon" 11 "Libya" 13 "Morocco" 15 "Palestine" 17 "Saudia Arabia" 19 "Sudan" 21 "Tunisia" 22 "Yemen" 
label values country country_clas

label variable Education "Education matched to surveys"
label variable job "Job matched to surveys"
label variable country "country code matched to surveys"

gen age = 2014 - YearofBirth
label variable age "age as of 2014"

/************************
ISO code for merger
*************************/

gen ISO_code = ""
label variable ISO_code "ISO code for country of residence"

replace ISO_code = "AFG"  if  Country_resid == "Afghanistan" 
replace ISO_code = "ALB"  if  Country_resid == "Albania" 
replace ISO_code = "DZA"  if  Country_resid == "Algeria" 
replace ISO_code = "AUS" if  Country_resid == "Australia" 
replace ISO_code = "AUT" if  Country_resid == "Austria" 
replace ISO_code = "AZE" if  Country_resid == "Azerbaijan" 
replace ISO_code = "BHR" if  Country_resid == "Bahrain" 
replace ISO_code = "BEL" if  Country_resid == "Belgium" 
replace ISO_code = "BIH" if  Country_resid == "Bosnia" 
replace ISO_code = "BGR" if  Country_resid == "Bulgary" 
replace ISO_code = "CMR" if  Country_resid == "Cameroun" 
replace ISO_code = "CAN" if  Country_resid == "Canada" 
replace ISO_code = "CHN" if  Country_resid == "China" 
replace ISO_code = "DNK" if  Country_resid == "Denmark" 
replace ISO_code = "EGY" if  Country_resid == "Egypt" 
replace ISO_code = "FRA" if  Country_resid == "France" 
replace ISO_code = "GEO" if  Country_resid == "Georgia" 
replace ISO_code = "DEU" if  Country_resid == "Germany" 
replace ISO_code = "IND" if  Country_resid == "India"
replace ISO_code = "IDN" if  Country_resid == "Indonesia" 
replace ISO_code = "IRN" if  Country_resid == "Iran" 
replace ISO_code = "IRQ" if  Country_resid == "Iraq" 
replace ISO_code = "IRL" if  Country_resid == "Ireland" 
replace ISO_code = "JOR" if  Country_resid == "Jordan" 
replace ISO_code = "KAZ" if  Country_resid == "Kazakhstan" 
replace ISO_code = "KEN" if  Country_resid == "Kenya" 
replace ISO_code = "UNK" if  Country_resid == "Kosovo" 
replace ISO_code = "KWT" if  Country_resid == "Kuwait" 
replace ISO_code = "KGZ" if  Country_resid == "Kyrgyzstan"
replace ISO_code = "LBN" if  Country_resid == "Lebanon" 
replace ISO_code = "LBY" if  Country_resid == "Libya" 
replace ISO_code = "MKD" if  Country_resid == "Macedonia" 
replace ISO_code = "MYS" if  Country_resid == "Malaysia" 
replace ISO_code = "MRT" if  Country_resid == "Mauritania" 
replace ISO_code = "MAR" if  Country_resid == "Morocco" 
replace ISO_code = "NLD" if  Country_resid == "Netherlands"         
replace ISO_code = "NOR" if  Country_resid == "Norway"
replace ISO_code = "PAK" if  Country_resid == "Pakistan" 
replace ISO_code = "PSE" if  Country_resid == "Palestine" 
replace ISO_code = "POL" if  Country_resid == "Poland" 
replace ISO_code = "QAT" if  Country_resid == "Qatar" 
replace ISO_code = "RUS" if  Country_resid == "Russia" 
replace ISO_code = "SAU" if  Country_resid == "Saudi Arabia" 
replace ISO_code = "SRB" if  Country_resid == "Serbia"
replace ISO_code = "SOM" if  Country_resid == "Somalia" 
replace ISO_code = "ZAF" if  Country_resid == "South Africa" 
replace ISO_code = "ESP" if  Country_resid == "Spain" 
replace ISO_code = "SDN" if  Country_resid == "Sudan"
replace ISO_code = "SWE" if  Country_resid == "Sweden" 
replace ISO_code = "CHE" if  Country_resid == "Switzerland" 
replace ISO_code = "SYR" if  Country_resid == "Syria" 
replace ISO_code = "TJK" if  Country_resid == "Tajikistan" 
replace ISO_code = "TTO" if  Country_resid == "Trinidad" 
replace ISO_code = "TUN" if  Country_resid == "Tunisia" 
replace ISO_code = "TUR" if  Country_resid == "Turkey" 
replace ISO_code = "TKM" if  Country_resid == "Turkmenistan" 
replace ISO_code = "UKR" if  Country_resid == "Ukraine" 
replace ISO_code = "GBR" if  Country_resid == "United Kingdom" 
replace ISO_code = "USA" if  Country_resid == "United States" 
replace ISO_code = "UZB" if  Country_resid == "Uzbekistan"
replace ISO_code = "YEM" if  Country_resid == "Yemen" 


saveold ISIS_fighters_new, replace
